{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 在Minist数据集上进行PCA分析\n",
    "图像数据维数高，而且特征之间（像素之间）相关性很高，因此我们预计用很少的维数就能保留足够多的信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#导入必要的工具包\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn import svm\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.decomposition import PCA\n",
    "import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#读取训练数据和测试数据\n",
    "train = pd.read_csv('./data/MNIST_train.csv')\n",
    "test = pd.read_csv('./data/MNIST_test.csv')\n",
    "\n",
    "y_train = train.label.values\n",
    "X_train = train.drop(\"label\",axis=1).values\n",
    "X_test = test.values "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#将像素值[0,255]  --> [0,1]\n",
    "X_train = X_train / 255.0\n",
    "X_test = X_test / 255.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the shape of train_image: (42000, 784)\n",
      "the shape of test_image: (28000, 784)\n"
     ]
    }
   ],
   "source": [
    "# 原始输入的特征维数和样本数目\n",
    "print('the shape of train_image: {}'.format(X_train.shape))\n",
    "print('the shape of test_image: {}'.format(X_test.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/qing/anaconda2/lib/python2.7/site-packages/sklearn/model_selection/_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "# 将训练集合拆分成训练集和校验集，在校验集上找到最佳的模型超参数（PCA的维数）\n",
    "X_train_part, X_val, y_train_part, y_val = train_test_split(X_train,y_train, train_size = 0.8,random_state = 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(33600, 784)\n",
      "(8400, 784)\n"
     ]
    }
   ],
   "source": [
    "#拆分后的训练集和校验集的样本数目\n",
    "print(X_train_part.shape)\n",
    "print(X_val.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 一个参数点（PCA维数为n）的模型训练和测试，得到该参数下模型在校验集上的预测性能\n",
    "def n_component_analysis(n, X_train, y_train, X_val, y_val):\n",
    "    start = time.time()\n",
    "    \n",
    "    pca = PCA(n_components=n)\n",
    "    print(\"PCA begin with n_components: {}\".format(n));\n",
    "    pca.fit(X_train)\n",
    "    \n",
    "    # 在训练集和测试集降维 \n",
    "    X_train_pca = pca.transform(X_train)\n",
    "    X_val_pca = pca.transform(X_val)\n",
    "    \n",
    "    # 利用SVC训练\n",
    "    print('SVC begin')\n",
    "    clf1 = svm.SVC()\n",
    "    clf1.fit(X_train_pca, y_train)\n",
    "    \n",
    "    # 返回accuracy\n",
    "    accuracy = clf1.score(X_val_pca, y_val)\n",
    "    \n",
    "    end = time.time()\n",
    "    print(\"accuracy: {}, time elaps:{}\".format(accuracy, int(end-start)))\n",
    "    return accuracy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PCA begin with n_components: 0.7\n",
      "SVC begin\n",
      "accuracy: 0.97869047619, time elaps:14\n",
      "PCA begin with n_components: 0.710714285714\n",
      "SVC begin\n",
      "accuracy: 0.979166666667, time elaps:14\n",
      "PCA begin with n_components: 0.721428571429\n",
      "SVC begin\n",
      "accuracy: 0.979404761905, time elaps:14\n",
      "PCA begin with n_components: 0.732142857143\n",
      "SVC begin\n",
      "accuracy: 0.979404761905, time elaps:15\n",
      "PCA begin with n_components: 0.742857142857\n",
      "SVC begin\n",
      "accuracy: 0.980119047619, time elaps:15\n",
      "PCA begin with n_components: 0.753571428571\n",
      "SVC begin\n",
      "accuracy: 0.979761904762, time elaps:15\n",
      "PCA begin with n_components: 0.764285714286\n",
      "SVC begin\n",
      "accuracy: 0.979880952381, time elaps:16\n",
      "PCA begin with n_components: 0.775\n",
      "SVC begin\n",
      "accuracy: 0.979166666667, time elaps:16\n",
      "PCA begin with n_components: 0.785714285714\n",
      "SVC begin\n",
      "accuracy: 0.97880952381, time elaps:18\n",
      "PCA begin with n_components: 0.796428571429\n",
      "SVC begin\n",
      "accuracy: 0.979047619048, time elaps:18\n",
      "PCA begin with n_components: 0.807142857143\n",
      "SVC begin\n",
      "accuracy: 0.979166666667, time elaps:19\n",
      "PCA begin with n_components: 0.817857142857\n",
      "SVC begin\n",
      "accuracy: 0.978928571429, time elaps:20\n",
      "PCA begin with n_components: 0.828571428571\n",
      "SVC begin\n",
      "accuracy: 0.97880952381, time elaps:23\n",
      "PCA begin with n_components: 0.839285714286\n",
      "SVC begin\n",
      "accuracy: 0.97880952381, time elaps:22\n",
      "PCA begin with n_components: 0.85\n",
      "SVC begin\n",
      "accuracy: 0.978095238095, time elaps:24\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数（PCA维数）搜索范围\n",
    "n_s = np.linspace(0.70, 0.85, num=15)\n",
    "accuracy = []\n",
    "for n in n_s:\n",
    "    tmp = n_component_analysis(n, X_train_part, y_train_part, X_val, y_val)\n",
    "    accuracy.append(tmp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0x1a0edd8d50>]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD8CAYAAAC2PJlnAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmYFOW5///3RxBwg6AMSBwFVFzG\nDWWCxg3BBSEijfo1cDTiLyhqNBw1ekRNcjwkRowLicYlJseISZRoDMMiCMjmElEGFRQRHHEDzBE3\nwqLIwP3746mWph2Ynpnuru6Z+3VdfU111VNVd6PT99Szysxwzjnn6mOHuANwzjlXvDyJOOecqzdP\nIs455+rNk4hzzrl68yTinHOu3jyJOOecqzdPIs455+rNk4hzzrl68yTinHOu3prHHUCutWvXzjp3\n7hx3GM45VzTmz5//sZmVZFK20SeRzp07U1lZGXcYzjlXNCS9l2lZr85yzjlXb55EnHPO1ZsnEeec\nc/XmScQ551y9eRJxzjlXb55EnHPO1ZsnEeecc/XmScTl3dix8O67cUfhnMsGTyIur95/HwYPhnPP\nhU2b4o7GOddQnkRcXo0fH37Omwf33x9vLM65hvMk4vJq/Hg46CA47TS4/npYuTLuiJxzDeFJxOXN\nZ5/B7NkwcCDcey9s3AhXXhl3VM65hvAk4vLmySdDO0giAfvtBz/7GTz+eNjvnCtOnkRc3lRUQMeO\nUF4e3l9zDZSVweWXw7p18cbmnKsfTyIuL778Ep56CgYMgB2i/+tatAiN6++9ByNHxhufc65+PIm4\nvJgxIzxtJBJb7z/hBBg6FO64AxYujCc251z9ZZREJJ0uaYmkKkkjajjeSdIMSQslzZZUmnLs15IW\nSVos6S5JivZ3l/RadM3U/btLmi7prehn22i/onJV0X2Oys4/gcuHigpo3Rp69frmsV//GnbfHS65\nBDZvzn9szrn6qzWJSGoG3AP0BcqAwZLK0ordDjxsZocDI4FbonOPBY4DDgcOBb4D9IzOuQ8YBnSN\nXqdH+0cAM8ysKzAjek90/2TZYdH5rghs2gQTJkDfvqEKK93uu4cnkblz4YEH8h+fc67+MnkS6QFU\nmdkyM/sKGAsMSCtTRvjCB5iVctyAVkALoCWwI/B/kjoCrc3sBTMz4GEgWdExABgTbY9J2/+wBXOB\nb0XXcQVu7lz46KNvVmWlOv986N0bRoyAf/0rf7E55xomkySyF/BByvvl0b5UC4Czo+2BwG6S9jCz\nFwhJ5cPoNdXMFkfnL9/GNTuY2YcA0c/2dYjDFaDx42HHHcOTyLZIcN99oQH+qqvyF5tzrmEySSKq\nYZ+lvb8G6CnpFUJ11QqgWtL+wMFAKeELv7ekEzO8Zn3iCAWlYZIqJVWuWrWqlsu6XDKDcePCU0ab\nNtsve8ABcMMNYYLGqVPzE59zrmEySSLLgb1T3pcCW01WYWYrzewsMzsSuDHat5rwVDLXzNaa2Vpg\nCnBMdM3SbVwzWd1F9POjTONIiecBMys3s/KSkpIMPqLLlcWLoapq+1VZqa67Dg48EH70I/jii9zG\n5pxruEySyDygq6QukloAg4AJqQUktZOUvNb1wIPR9vuEJ5TmknYkPKUsjqqp1kg6JuqVdQEQTc3H\nBGBItD0kbf8FUS+tY4DVyWovV7gqKsLPM8/MrHzLlmHsyLJl8Mtf5i4u51x21JpEzKwauAKYCiwG\nHjOzRZJGSkp+NZwELJG0FOgA3Bzt/zvwNvAaod1kgZlNjI5dBvwRqIrKTIn2jwJOlfQWcGr0HmAy\nsCwq/wfgR/X5wC6/KiqgRw/49rczP+ekk+DCC0PX30WLchWZcy4bFDpHNV7l5eVWWVkZdxhN0ooV\nUFoKv/pVmLG3Lj7+OMz2e9BB8MwzW0a5O+dyT9J8MyvPpKz/arqcmRBVembaHpKqXTu4/XZ4/nl4\n8MHayzvn4uFJxOVMRQV07RqeJupjyBDo2RP+67/COBPnXOHxJOJy4vPPYebM8BSimjpnZ0AKjexr\n18JPfpLd+Jxz2eFJxOXElClQXV2/qqxUBx0URrH/5S/w9NPZic05lz2eRFxOjB8PHTrA0Uc3/Fo3\n3AD77w+XXRZGtDvnCocnEZd1GzbA5MlhbEizZg2/XqtWYUqUqqrQ0yuf1q6FDz6ovZxzTZUnEZd1\ns2bBmjVhAapsOeWUMEnjqFHw5pvZu+62fPUV3H037LtvWH1xzZrc39O5YuRJxGVdRQXssgucfHJ2\nr3vHHbDrrnDppWFOrlzYvBn++tfQFjN8OOy5Z3ga8bm8nKuZJxGXVZs3b1k7pFWr7F67ffswin3O\nHBgzpvbydWEWquCOPDI88bRpE5bzffnlMGYlOX2Lc25rnkRcVs2bBx9+2PBeWdvywx/C8cfDNdeE\nUe3ZMHdumGrle98LTx2PPALz50OfPtC8OfTvD5MmwcaN2bmfc42JJxGXVRUVoTG9X7/cXH+HHcLY\nkdWrQyJpiMWLYeBA+O53QzvL734X9g0evPU0K4lEuN+cOQ27n3ONkScRl1UVFeGv+rZtc3ePQw4J\no9jHjIHZs+t+/gcfwNChcOihMGMG/OIX8PbbcPnlNS/fe8opsNNOoduyc25rnkRc1ixZEv6iz1VV\nVqqf/jT0nLrkktClOBOffALXXhumYvnLX+A//zNMOf/Tn4YG+23ZeedQtVVRkbsGfeeKlScRlzXJ\nv9Sz2bV3W3baCe69F5YuhVtv3X7ZdevC+JL99gs9vAYNCufdeWdoNM9EIgHLl4eGdufcFp5EXNZU\nVMBRR8Hee9deNhv69AkJ4eabQ1JIt3FjaD/Zf3+48UY48URYuBAeegg6darbvc44I7STeC8t57bm\nScRlxYcfhl5O+ajKSjV6dHgqueyyLVVNmzfDY4+FtpPLLgtPIM8+G7oeH3po/e6zxx5wwgneLuJc\nuoySiKTTJS2RVCVpRA3HO0maIWmhpNmSSqP9vSS9mvL6UlIiOtZb0suSXpc0RlLzaP+1KeVfl7RJ\n0u7RsXclvRYd85WmCsjEieFLPN9JZM89wyj2mTNDO8f06WElxe9/PzSST5gQEsjxxzf8XokEvPZa\naIR3zkXMbLsvoBlh+dp9gRaEZW7L0so8DgyJtnsDf67hOrsDnwI7E5LXB8AB0bGRwNAazukPzEx5\n/y7QrraYU1/du3c3l3v9+pntu6/Z5s35v/emTWbHHGPWvLkZmO2zj9lDD5lVV2f3PsuWhevfcUd2\nr+tcoQEqLcPv2EyeRHoAVWa2zMy+AsYC6U2nZcCMaHtWDccBzgGmmNl6YA9gg5kla7KnA2fXcM5g\n4NEMYnQxWrMmTNPekLVDGmKHHeCPfwztMaNHh/aRIUOyM/ljqi5d4IgjvF3EuVSZJJG9CE8NScuj\nfakWsCUJDAR2k7RHWplBbEkIHwM7Skqu4XsOsFVzrKSdgdOBJ1J2GzBN0nxJw7YVsKRhkiolVa5a\ntWq7H8413FNPhQkL89Era1sOOQRefBGuvBJatszdfQYMCEv2+v9WzgWZJJGa/rZM7y1/DdBT0itA\nT2AFUP31BaSOwGHAVIDocWkQMFrSS8Ca1PKR/sDzZvZpyr7jzOwooC9wuaQTawrYzB4ws3IzKy8p\nKcngI7qGqKgIXWWPPTbuSHIvkQgN95MmxR2Jc4UhkySynK2fEkqBlakFzGylmZ1lZkcCN0b7VqcU\nORcYZ2YbU855wcxOMLMewDPAW2n3TX1y+fo+0c+PgHGEqjYXo40b4cknw/xSzZvHHU3udesG++zj\nVVrOJWWSROYBXSV1kdSC8OU+IbWApHaSkte6Hngw7RrfaNuQ1D762RK4Drg/5VgbwhPN+JR9u0ja\nLbkNnAa8nkH8LofmzAnzSuW7V1ZcpPBZp00Lgxida+pqTSJmVg1cQaiKWgw8ZmaLJI2UdGZU7CRg\niaSlQAfg5uT5kjoTnmTSp6+7VtJiYCEw0cxmphwbCEwzs9Rf0w7Ac5IWAC8BT5rZU5l+UJcbFRVh\nnMYpp8QdSf4kEmGZ3mnT4o7EufjJGvlkQOXl5VZZ6UNKcsEsjE7/zndg3Li4o8mf6uqwtkn//tlf\n18S5QiBpvpmV117SR6y7Bpg/H1asaDpVWUnNm4dpUCZNCgnFuabMk4irt/HjwxiNM86IO5L8SyTg\n00/huefijsS5eHkScfVWUREmNdwjfURQE9CnT1j+13tpuabOk4irl6oqeP31eAcYxmmXXUJnAl9j\nxDV1nkRcveRz7ZBClUjAe+/BggVxR+JcfDyJuHqpqAjzSHXpEnck8enfP4wb8enhXVPmScTV2Ucf\nwT//2fR6ZaVr3x6OO87bRVzT5knE1dmkSWH+qKZclZU0YAC8+iq8+27ckTgXD08irs4qKsL8Ud26\nxR1J/JKJ1Ku0XFPlScTVybp1YfXAuNYOKTRdu4Zp6D2JuKbKk4irk2nTwrxRTb09JFUiAc88A598\nEnckzuWfJxFXJxUV0LYtnHBC3JEUjkQCNm0KU+I719R4EnEZq66GiRPDNCdNYe2QTHXvDnvt5b20\nXNPkScRl7Nln4bPPvCornRQa2KdOhS++iDsa5/LLk4jLWEVFmC+qT5+4Iyk8iQSsXw9PPx13JM7l\nV0ZJRNLpkpZIqpI0oobjnSTNkLRQ0mxJpdH+XpJeTXl9KSkRHest6WVJr0saI6l5tP8kSatTzvl5\npnG43DELPZBOPTXMG+W21rMntGnjVVqu6ak1iUhqBtwD9AXKgMGSytKK3Q48bGaHAyOBWwDMbJaZ\ndTOzbkBvYD0wLVpKdwwwyMwOBd4DhqRc79nkeWY2sg5xuBxZsCDME+UDDGvWogX06wcTJoRGduea\nikyeRHoAVWa2zMy+AsYC6V8lZcCMaHtWDccBzgGmmNl6YA9gg5ktjY5NB87OQhwuRyoqQt1///5x\nR1K4Egn4+OMwJYxzTUUmSWQv4IOU98ujfakWsCUJDAR2k5S+ysQg4NFo+2NgR0nJ5RfPIazDnvRd\nSQskTZF0SB3icDlSURHmiWrfPu5ICtfpp4cnEq/Sck1JJkmkpnHJ6SsoXAP0lPQK0BNYAXy9cKik\njsBhwFQACwu7DwJGS3oJWJNS/mWgk5kdAdwNJH8lM4kjeb9hkiolVa5atar2T+i26913Q3WW98ra\nvtat4eSTQ9uRrzHimopMkshytn5KKAVWphYws5VmdpaZHQncGO1bnVLkXGCcmW1MOecFMzvBzHoA\nzwBvRfv/bWZro+3JhCeWdpnEkXLtB8ys3MzKS0pKMviIbnt87ZDMDRgAb78NixbFHYlz+ZFJEpkH\ndJXURVILwhPEhNQCktpFjeUA1wMPpl1jMFuqspLntI9+tgSuA+6P3u8phVmZJPWIYvwkkzhcblRU\nhPmh9t8/7kgK35lnhp9epeWailqTiJlVA1cQqqIWA4+Z2SJJIyVFvzKcBCyRtBToANycPF9SZ8IT\nxJy0S18raTGwEJhoZjOj/ecAr0taANxF6MFl24qj7h/Z1cUnn4R5obwqKzMdO8Ixx3gScU2HrJFX\n3paXl1tlZWXcYRStMWPgwgth3jwoL6+1uANuvRVGjID334e99669vHOFRtJ8M8voN95HrLvtGj8+\nzAvVvXvckRSP5FPbBK9sdU2AJxG3TevXw1NPhcZiXzskcwceGF5epeWaAk8ibpuefjpMKOjtIXWX\nSMDs2WHCSucaM5/QuxGYNg3uuCNM1Z5Ny5aF+aB69szudZuCRCK0jUyeDOedF3c0zuWOJ5EiN3t2\nqG5q3z6se55NpaUwfHgYhe3qpkcP2HPP0KbkScQ1Zp5EilhlZZjLat99QzfcPdInmnGx2WGHMGbk\nkUfCcsKtWsUdkXO54W0iReqNN8JcTe3aheosTyCFJ5GAtWth5szayzpXrDyJFKF33gnreuy4Y2j8\n3sunoSxIvXvDrrt6Ly3XuHkSKTIffhgSyBdfhCeQ/faLOyK3LS1bblljZPPmuKNxLjc8iRSRTz8N\nS9P+618wZQocdljcEbnaDBgA//d/8OKLcUfiXG54EikSa9fC974HS5aEHj9HHx13RC4T/fpB8+Ze\npeUaL08iRWDDhtBIO28ejB0b1qxwxeFb34JevWDcOF9jxDVOnkQKXHU1DB4MM2bAgw/CwIFxR+Tq\nKpGAt96CN9+MOxLnss+TSAHbvBkuvjj8Ffvb38IFF8QdkasPX2PENWaeRAqUGVx9NTz0ENx0Uxg5\n7opTaWmYRj+5QqRzjUlGSUTS6ZKWSKqSNKKG450kzZC0UNJsSaXR/l6SXk15fSkpER3rLellSa9L\nGiOpebT/vOg6CyX9U9IRKfd5V9Jr0bUa9SIhI0eGp48rr4Sf/zzuaFxDJRKhh9bKGhd0dq541ZpE\nJDUD7gH6AmXAYEllacVuBx42s8OBkcAtAGY2y8y6mVk3oDewHpgWLaU7hrBq4aHAe8CQ6FrvAD2j\na/0CeCDtXr2iazbaJZJ++9vw9HHhhWFiRZ+Gvfj5GiOuscrkSaQHUGVmy8zsK2AsMCCtTBkwI9qe\nVcNxCMveTjGz9cAewAYzWxodmw6cDWBm/zSz5ATac4HSTD9MY/DQQ+Hp46yz4A9/CHMwueJXVhbW\nqPd2EdfYZPIVtRfwQcr75dG+VAuIkgAwENhNUvpsToOAR6Ptj4EdJSWfJs4hrMOebigwJeW9EZ5k\n5ksalkHsReUf/4ChQ8OI9EceCeMLXOMghYGHM2fCv/8ddzTOZU8mSaSmypT0Hu/XAD0lvQL0BFYA\nX69uIakjcBgwFcDCwu6DgNGSXgLWpJaPzulFSCLXpew+zsyOIlStXS7pxBoDloZJqpRUuWrVqgw+\nYvyefjp05e3RIySTli3jjshlWyIBGzeG2QacaywySSLL2fopoRTYqnnQzFaa2VlmdiRwY7RvdUqR\nc4FxZrYx5ZwXzOwEM+sBPAO8lTwm6XDgj8AAM/sk9T7Rz4+AcYSqtm8wswfMrNzMyktKSjL4iPGa\nOzd8wRx4YFjEaNdd447I5cJ3vwslJV6l5RqXTJLIPKCrpC6SWhCeILZqHpTULmosB7geeDDtGoPZ\nUpWVPKd99LMl4Wnj/uj9PsA/gB+ktJkgaRdJuyW3gdOA1zP5kIVs4ULo2zcsYDRtGrRtG3dELlea\nNQtjRp58MsxC4FxjUGsSMbNq4ApCVdRi4DEzWyRppKRoGBUnAUskLQU6ADcnz5fUmfAkMyft0tdK\nWgwsBCaaWXLVhZ8TGt7vTevK2wF4TtIC4CXgSTN7qo6ft6BUVcFpp8Euu4TqrD33jDsil2sDBsCa\nNWFFSucaA1kjn9CnvLzcKisLb0jJihVw3HFhYsVnn4WDD447IpcPX3wRFhIbMgTuvTfuaJyrmaT5\nmQ6j8A6kMfj449AD69NP4amnPIE0JTvtFFakHD/e1xhxjYMnkTz797/Dl8g778DEiWE6DNe0JBJh\n5HoBPiA7V2eeRPJow4bQsLpgATz+OPTsGXdELg7f+15oZB83Lu5I4vXSS/DEE3FH4RrKk0ge/elP\nMGcO/O//whlnxB2Ni8vuu4cOFQ88AEUyjCmrFi8OMzIcfTSccw6MHh13RK4hPInkSXU13HZbGEz4\ngx/EHY2L2+23h15a11wTdyT588EHcNFFcOihoTfiyJEhiVx9dVgrxxUnn1gjT554ApYtC18ePqGi\nKyuDa6+FX/0q9NTq3TvuiHLn00/hllvg7rvDEgfDh8MNN4SBlxs2hHbCiy+GNm3g7LNrv54rLN7F\nNw/M4KijQvfON97wSRVd8MUX4a/y5s1DO1mrVnFHlF3r14cZqW+9NSSKH/wgPH106rR1uXXrQvXe\nvHkwaVLYdvHyLr4FZvp0ePVV+K//8gTitthpJ7jvPli6FEaNijua7Nm4EX7/+zBr8Q03wIknhiQ5\nZsw3EwiEwbZPPhmezgYOhH/+M/8xu/rzr7Q8GDUK9toLzjsv7khcoTnttDDx5i23wJIlcUfTMGah\n1+Ehh8Cll8K++4aBtBMmwGGHbf/cb30Lpk4Nvyff+16YDsgVB08iOfbSSzBrFlx1lc/M62p2553h\nqeTSS8MXcTGaMSN0Gjn3XGjRIiSOZ5+F44/P/BodOoSn9l13Dcn1rbdqP8fFz5NIjt16a/gra1ij\nW/3EZcuee4b/T2bPhj//Oe5o6ubll8MX/imnwEcfhUXVFiyA/v3r14GkU6eQSDZtCtdcvjzrIbss\n8ySSQ2++GQaUXXEF7LZb3NG4QnbxxWGq+J/8BD75pPbycauqgkGDoHv3kEjuvDNUxw0ZEgZSNsRB\nB4Wqrc8/D9MDNcWxNMXEk0gO3XZbqML68Y/jjsQVuh12CI3Rn38eOmAUqg8/hMsuC/O9TZoEP/tZ\n6Lp+1VXZ7V121FFhWqB33w3TBK1eXespLiaeRHJkxYpQNTF0KLRvH3c0rhgcdtiWgXdz0hdOiNnq\n1XDjjaHH1R//CJdcAm+/Hbrstm6dm3ueeGIYX7VwYZgu6IsvcnMf1zCeRHJk9OgwS+tPfhJ3JK6Y\n/Pzn0LlzaGQvlIWrnnkG9tsvDIwcMCBU0/7ud6EhPNf69YO//CU00v+//xe6D7vC4kkkBz77LFRN\nfP/70KVL3NG4YrLLLmGdkTffDNWhcZs/P8zzVlIS2j4eeSQklHz6/vfh/vvDWJILLgiN7q5wZJRE\nJJ0uaYmkKkkjajjeSdIMSQslzZZUGu3vFa1OmHx9KSkRHest6WVJr0saI6l5tF+S7orutVDSUSn3\nGSLpreg1JDv/BNl3771hsanrros7EleM+vYNf3X/8pfxdnNdvBj69AkTRk6fDkceGV8sw4aF8VZj\nx8LllxdvV+hGycy2+wKaAW8D+wItgAVAWVqZx4Eh0XZv4M81XGd34FNgZ0Ly+gA4IDo2EhgabfcD\npgACjgFeTDl/WfSzbbTdtrb4u3fvbvm0bp1ZSYlZv355va1rZFasMGvd2uyUU8w2b87//d95x2yv\nvcw6dDB76638339bRowwg/DT5Q5QabV8tyZfmTyJ9ACqzGyZmX0FjAUGpJUpA2ZE27NqOA5wDjDF\nzNYT1lDfYGZLo2PTgeTUawOAh6PPMhf4lqSOQB9gupl9amafReecnkH8efWnP4Uuif4U4hri298O\nbRBPPx2qkPLpX/8KXWvXrYNp00JjeqH41a9Ce9GoUWFsjYtfJklkL8JTQ9LyaF+qBWxJAgOB3STt\nkVZmEPBotP0xsKOk5ARf5wB713K/TOKIVXV1mKX3u9+FE06IOxpX7C69NIwCv/rqMBNuPnz2WajC\nWrkSJk+Gww/Pz30zJYVG/UGDYMSI0Pbo4pVJEqlp3Gl6jeQ1QE9JrwA9gRVA9dcXCE8ShwFTAaLH\npUHAaEkvAWtSym/rfpnEkbzfMEmVkipX5XGk0mOPhX7tI0b4dO+u4Zo1C1+Sn3wS/p/KtXXrwrxV\nb74JFRXhj6FC1KwZPPxw6Ll12WWhncTFJ5MkspwtTwkApcDK1AJmttLMzjKzI4Ebo32pw4POBcaZ\n2caUc14wsxPMrAfwDJBsQtzW/WqNI+XaD5hZuZmVl5SUZPARG84sPF6XlfmqhS57unWDK6+EP/wB\nnn8+d/fZsCHMoPvii/Doo6E6q5DtuGOY7PGEE8IU85Mnxx1R05VJEpkHdJXURVILwhPEhNQCktpJ\nSl7reiB9nbLBbKnKSp7TPvrZErgOuD86NAG4IOqldQyw2sw+JDzFnCapraS2wGnRvoLw1FNhUJRP\n9+6y7aabYO+9wwC/r77K/vWrq8MM09Onh6Wbzzor+/fIhZ13DqPaDz88LGb1zDNxR9Q01fp1Z2bV\nwBWEL+zFwGNmtkjSSElnRsVOApZIWgp0AG5Oni+pM+EJIn0M7rWSFgMLgYlmNjPaP5nQ86oK+APw\noyiOT4FfEJLaPGBktK8gjBoVftEHD447EtfY7Lor3HMPLFoU5qjKps2bQ/fZJ56A3/wGLrwwu9fP\ntdatwx9wnTuHSR9ffjnuiJoeX9kwC154AY49NoxSv/LKnN7KNWFnnQVTpoRksu++Db+eWZhRYfRo\n+O//Dk88xWr58jDt/Lp1YXT7QQfFHVFx85UN8+zWW8OArIsuijsS15jddVdYSvdHP8rOYLtf/jIk\nkOHDQxIpZqWloTquWbPQnvPuu3FH1HR4EmmgN96A8ePDTL277hp3NK4xKy0NX/xTp4aegA1x111h\nnq4hQ0IiaQy9Cbt2Df82a9eGySz/539gzZq4o2r8PIk00G23hVXprrgi7khcU3DFFWENj//8zzBt\nfH08/HA4f+DAMCNvY+oIcsQRoYfZaaeF6rn99oO7785NhwQXNKL/ffLvgw/CDKMXXwzt2sUdjWsK\nkmNHVq2CG26o+/kVFfDDH8LJJ4eR8M2bZz/GuB1wQOgoMHduWO99+PDQRvLXv4aOBC67PIk0QLKn\nzNVXxxuHa1q6dw/Vp/ffH74oMzVjRpgRt7w8JJNsLiJViI4+GmbODJ0R2rSB888Pi11NmeITOGaT\nJ5F6+uSTMABs8OCwLrRz+fSLX4T5tS65JLM1Nl58MawFcsABYWBeU2m/k8LKiPPnhyevNWvCSPde\nveqWgN22eRKpp3vuCd0JC3kpU9d47bZbqOtfuDCM79ie118P08vvuWeYUHH33fMTYyHZYYfwB9/i\nxWHurcWLw7QuAweGbVd/nkTqYd260Lulf3849NC4o3FNVSIRlo296aZtd2l9++3Q5XWnnUIX2I4d\n8xlh4WnRIqxHklzad8aM8Ds8dGho43R150mkHh58MFRn+XTvLk5SeBqRQq+t9Hr+lStDAtm4MSQQ\nX2Vzi113hZ/9LCST4cNDB5muXeHaa/M3Y3Jj4UmkjjZuDNO9H388HHdc3NG4pm6ffcJf1E8+Cf/4\nx5b9n3wSEsiqVaEhuawsvhgLWUlJGCezZEnodHDHHWE2gFtugfXr446uOHgSqaOxY+H99/MzNbdz\nmRg+PMz2O3w4/PvfofG4b9/wV/bEifCd78QdYeHr3BnGjIEFC8LMwDfcEBbj+v3vM+u40JR5EqmD\nzZvDFCeHHhp6eDhXCJo3D192H34YqmMGDAgTET7+OJx0UtzRFZfDDguJ99lnQ/XfpZeGsSaPP+7d\ngrelEQ41yp3Jk8Pkd3/+c+MFVZOsAAASvklEQVSYJsI1Hj16hDm17rkn/L/5l7+Ejh+ufo4/Hp57\nDiZNguuvh3PPDWNMunfP/r3atg1dtlu0yP6188Fn8a2D448Ps4W+9VZYFMe5QrJ6deiy+h//4ZOB\nZtOmTSEp33Zb9hvdq6tDu1VFRXiCLBR1mcXXn0Qy9NxzYWW5u+7yBOIKU5s2YYS2y65mzcJElUOG\nZP/aGzdC+/aFl0TqIqM2EUmnS1oiqUrSN5qUJXWSNEPSQkmzJZVG+3tJejXl9aWkRHTsZEkvR/uf\nk7R/tH90Svmlkj5Puc+mlGMT0uPIpVtvDfNjDR2az7s65xqzHXcM69pPnBieSopRrUlEUjPgHqAv\nUAYMlpTeYfB24GEzOxwYCdwCYGazzKybmXUDegPrgWnROfcB50XHHgF+Gp1zVco5dwMpHRf5InnM\nzM4kT15/PdSNDh8eluR0zrlsSSRCl+znn487kvrJ5EmkB1BlZsvM7CtgLJD+4FUGzIi2Z9VwHOAc\nYIqZJXtfG9A62m4DrKzhnG+szR6HX/8adtkljHR1zrls6tMHWrYMVVrFKJMksheQOiHA8mhfqgXA\n2dH2QGA3SXuklRnE1gnhImCypOXAD4BRqYUldQK6AKm1vK0kVUqam6wWy7X33gsTtw0b1jTnHHLO\n5dZuu4Wp+cePL85uxJkkkZo6s6Z/1GuAnpJeAXoCK4Cva/gkdQQOA6amnHMV0M/MSoE/AXemXXMQ\n8Hcz25Syb5+ox8B/AL+RtF+NAUvDomRTuWrVqlo/4PbccUeYvM2ne3fO5UoiAe+8A6+9FnckdZdJ\nElkO7J3yvpS0qiczW2lmZ5nZkcCN0b7VKUXOBcaZ2UYASSXAEWb2YnT8b8CxafdNf3LBzFZGP5cB\ns4EjawrYzB4ws3IzKy8pKcngI9bs44/Dym/nnReWJnXOuVzo3z+M7ynGKq1Mksg8oKukLpJaEL7c\nt+oZJamdpOS1rgceTLtGetvGZ0AbSQdE708Fvp6QWdKBQFvghZR9bSW1TN4POA54I4P46+3uu+GL\nL3y6d+dcbu25Z5iavlEmETOrBq4gVEUtBh4zs0WSRkpK9pA6CVgiaSnQAbg5eb6kzoQnmTlp17wY\neELSAkKbyLUptx0MjLWtR0IeDFRG5WcBo8wsZ0lk7dqQRBIJOPjgXN3FOeeCRAJeeSXMzVdMfMT6\nNvzmN3DVVfDCC3DMMTkIzDnnUixdCgceGAY0//jH8cZSlxHrPgFjDb76KjSo9+zpCcQ5lx8HHBBq\nPYqtSsuTSA02bQpjQn7+87gjcc41JYkEzJlTXAtjeRKpwU47hfVCeveOOxLnXFOSSIQ/Yp98Mu5I\nMudJxDnnCkR5OXTsGAYeFgtPIs45VyB22CHM5vvUU2F4QTHwJOKccwUkkYB162DGjNrLFgJPIs45\nV0B69YLWrYunl5YnEeecKyAtWkC/fmGNkU2bai8fN08izjlXYAYMgI8+grlz446kdp5EnHOuwPTt\nG1Y9LIYqLU8izjlXYNq0CePUxo0r/DVGPIk451wBSiTg7bfhjZzOVd5wnkScc64AnRnNkV7oAw89\niTjnXAH69rehR4/CbxfxJOKccwUqkYB582D58rgj2TZPIs45V6ASifBzwoTtl4tTRklE0umSlkiq\nkjSihuOdJM2QtFDSbEml0f5ekl5NeX0pKREdO1nSy9H+5yTtH+2/UNKqlHMuSrnPEElvRa8h2fkn\ncM65wnTQQWGdkUJuF6k1iUhqBtwD9AXKgMGSytKK3Q48bGaHAyOBWwDMbJaZdTOzbkBvYD0wLTrn\nPuC86NgjwE9Trve35Hlm9scojt2B/waOBnoA/y2pbX0+tHPOFQMpDDycORM+/zzuaGqWyZNID6DK\nzJaZ2VfAWGBAWpkyIDld2KwajgOcA0wxs/XRewNaR9ttgJW1xNEHmG5mn5rZZ8B04PQM4nfOuaKV\nSEB1NUyZEnckNcskiewFfJDyfnm0L9UC4OxoeyCwm6Q90soMAh5NeX8RMFnScuAHwKiUY2dHVWN/\nl7R3HeIAQNIwSZWSKletWrX9T+eccwXs6KOhQ4fC7aWVSRJRDfvSx1BeA/SU9ArQE1gBVH99Aakj\ncBgwNeWcq4B+ZlYK/Am4M9o/EegcVY09DYypQxxhp9kDZlZuZuUlJSXb+2zOOVfQmjULY0amTIEN\nG+KO5psySSLLgb1T3peSVvVkZivN7CwzOxK4Mdq3OqXIucA4M9sIIKkEOMLMXoyO/w04NjrvEzNL\n/lP9AeieaRzOOdcYDRgAa9bArFlxR/JNmSSReUBXSV0ktSBUS23V4UxSO0nJa10PPJh2jcFsXZX1\nGdBG0gHR+1OBxdG1OqaUOzO5n/AUc5qktlGD+mls/WTjnHON0sknwy67FGaVVq1JxMyqgSsIX9iL\ngcfMbJGkkZKigfmcBCyRtBToANycPF9SZ8ITxJy0a14MPCFpAaFN5Nro8HBJi6L9w4ELo3M+BX5B\nSGrzgJHRPueca9RatQoz+44fD5s3xx3N1mSFPkVkA5WXl1tlZWXcYTjnXIP89a9w/vnwwgtwzDG5\nvZek+WZWnklZH7HunHNFoF8/aN688AYeehJxzrki0LYt9OxZeO0inkScc65IJBLw5pvhVSg8iTjn\nXJEYEM0FUkhVWp5EnHOuSOy9N3Tv7knEOedcPQ0YAHPnwocfxh1J4EnEOeeKSCIBZjBxYtyRBJ5E\nnHOuiBx6KOy7b+H00vIk4pxzRUQKTyMzZoT5tOLmScQ554pMIgFffQVPPRV3JJ5EnHOu6Bx7LLRr\nVxhVWp5EnHOuyDRrBv37w5NPhieSOHkScc65IpRIwOrVMGdO7WVzyZOIc84VoVNPhZ13jn/goScR\n55wrQjvtBKedFtpF4lzRI6MkIul0SUskVUkaUcPxTpJmSFooabak0mh/L0mvpry+lJSIjp0s6eVo\n/3OS9o/2Xy3pjehaMyR1SrnPppRrTUiPwznnmpJEAlasgPnz44uh1iQiqRlwD9AXKAMGSypLK3Y7\n8LCZHQ6MBG4BMLNZZtbNzLoBvYH1wLTonPuA86JjjwA/jfa/ApRH1/o78OuU+3yRvJ6ZnYlzzjVh\nZ5wBO+wQby+tTJ5EegBVZrbMzL4CxgID0sqUATOi7Vk1HAc4B5hiZuuj9wa0jrbbACvh68STLDMX\nKM3kgzjnXFOzxx5w4omFn0T2Aj5Ieb882pdqAXB2tD0Q2E3SHmllBgGPpry/CJgsaTlhjfVRNdx7\nKDAl5X0rSZWS5iarxZxzrilLJGDRIqiqiuf+mSQR1bAvvRnnGqCnpFeAnsAKoPrrC0gdgcOAqSnn\nXAX0M7NS4E/AnVvdVDofKAduS9m9T7Tu738Av5G0X40BS8OiZFO5atWqDD6ic84Vp7jXGMkkiSwH\n9k55X0pU9ZRkZivN7CwzOxK4Mdq3OqXIucA4M9sIIKkEOMLMXoyO/w04NllY0inRdc40sw2p94l+\nLgNmA0fWFLCZPWBm5WZWXlJSksFHdM654tS5MxxxRHxVWpkkkXlAV0ldJLUgVEtt1TNKUjtJyWtd\nDzyYdo3BbF2V9RnQRtIB0ftTgcXRtY4Efk9IIB+l3KOtpJbJ+wHHAW9kEL9zzjVqiQQ8/zx89FHt\nZbOt1iRiZtXAFYSqqMXAY2a2SNJISckeUicBSyQtBToANyfPl9SZ8CQzJ+2aFwNPSFpAaBO5Njp8\nG7Ar8HhaV96Dgcqo/CxglJl5EnHONXnJNUYmTcr/vWVxjlLJg/LycqusrIw7DOecyxkz6NIFDj8c\nJmRhBJ2k+VH7c618xLpzzhU5KTSwT5sGa9fm996eRJxzrhFIJGDDhpBI8smTiHPONQInnABt2+a/\nl5YnEeecawSaNw9rjEyaBNXVtZfPFk8izjnXSAwYAJ99Bs8+m797ehJxzrlGok8faNUqv1VankSc\nc66R2GWXsFhVPtcY8STinHONSCIB778PCxbk536eRJxzrhE544wwbiRfVVqeRJxzrhFp3x6OO86T\niHPOuXq68EI45hjYuDH392qe+1s455zLp6FDwysf/EnEOedcvXkScc45V2+eRJxzztWbJxHnnHP1\nllESkXS6pCWSqiSNqOF4J0kzJC2UNFtSabS/V7Q6YfL1paREdOxkSS9H+5+TtH+0v6Wkv0X3ejFa\nGTF5n+uj/Usk9cnGP4Bzzrn6qzWJSGoG3AP0BcqAwZLK0ordDjxsZocDI4FbAMxslpl1M7NuQG9g\nPZCc7f4+4Lzo2CPAT6P9Q4HPzGx/YDRwaxRHGWF990OA04F7o9icc87FJJMnkR5AlZktM7OvgLHA\ngLQyZcCMaHtWDccBzgGmmNn66L0BraPtNsDKaHsAMCba/jtwsiRF+8ea2QYzeweoimJzzjkXk0yS\nyF7ABynvl0f7Ui0Azo62BwK7Sdojrcwg4NGU9xcBkyUtB34AjEq/n5lVA6uBPTKMAwBJwyRVSqpc\ntWpVrR/QOedc/WQy2FA17EufH/Ia4HeSLgSeAVYAXy+LIqkjcBgwNeWcq4B+ZvaipGuBOwmJZVv3\nyySOsNPsAeCB6N6rJL1XU7kMtAM+rue5+VDo8YHHmA2FHh8UfoyFHh8UVoydMi2YSRJZDuyd8r6U\nLVVPAJjZSuAsAEm7Ameb2eqUIucC48xsY1SmBDjCzF6Mjv8NeCrtfsslNSdUdX2aSRw1MbOSDD5j\njSRVmll5fc/PtUKPDzzGbCj0+KDwYyz0+KA4YqxJJtVZ84CukrpIakGolpqQWkBSO0nJa10PPJh2\njcFsXZX1GdBG0gHR+1OBxdH2BGBItH0OMNPMLNo/KOq91QXoCryUQfzOOedypNYnETOrlnQFoSqq\nGfCgmS2SNBKoNLMJwEnALZKMUJ11efL8qIvu3sCctGteDDwhaTMhqfwwOvy/wJ8lVRGeQAZF5yyS\n9BjwBqGq7HIz29SAz+6cc66BZPla/qoISRoWta8UpEKPDzzGbCj0+KDwYyz0+KA4YqyJJxHnnHP1\n5tOeOOecq7cmmUQymMZldMpULUslfZ5ybIikt6LXkPRz445RUjdJL0haFE1D8/1CizHleGtJKyT9\nrtDik7SPpGmSFkt6I3X6nQKK8dfRf+fFku6KBuXmO759JM2S9Er0/1u/lGN5maaovjFKOlXSfEmv\nRT97F1J8acfXSromF/E1mJk1qRehc8DbwL5AC8JAybLtlP8xoTMBwO7Asuhn22i7bYHFeADQNdr+\nNvAh8K1CijFl328JU978rtDiA2YDp0bbuwI7F1KMwLHA89E1mgEvACflOz7CeKzLou0y4N2U7QVA\nS6BLdJ1mcfwbbifGI4FvR9uHAisKKb6U408AjwPXZDu+bLya4pNIJtO4pErtntwHmG5mn5rZZ8B0\nwjxeBROjmS01s7ei7ZXAR0C9x8rkIkYASd2BDmyZS61g4lOYp625mU0HMLO1tmW6noKIkTDQthXh\ni6klsCPwfzHEt73pi/IxTVG9YzSzV6LfEYBFQCtJLQslPgCFCWuXRfEVpKaYROoyfUonwl9RM+t6\nbowxph7rQfiSebuQYlQYU3QHcG0O4mpwfISnuc8l/SOqYrhNuZnss94xmtkLhHnqPoxeU81scU3n\n5ji+m4DzFaYvmkx4Wsr03LhjTHU28IqZbSiU+CTtAlwH/E+WY8qqpphEMp4+hTBG5e+2ZTxKXc5t\niIbEGC4Qppr5M/D/mdnmLMcHDYvxR8BkM/tgG+WzoSHxNQdOIEzn8x1CVcSF2Q6QBsSosHTCwYSZ\nG/YCeks6MYb4BgMPmVkp0I8wxmuHDM/NhobEGC4gHUKYLfySAovvf4DRZrY2B3FlTVNMInWZPiV9\n0sh6Tb1SDw2JEUmtgSeBn5rZ3BzEBw2L8bvAFZLeJSwjcIGkUTWdGFN8ywl/lS6zMAloBXBUluNr\naIwDgblRVdtaYApwTAzxDQUeg6+fjloR5oAqpN+VbcWIwtpH44ALzCwXT+wNie9o4NfR78mVwA0K\nA78LS9yNMvl+Ef7KXEaoGkg2dB1SQ7kDgXeJxtJE+3YH3iE0qreNtncvsBhbEKblv7JQ/x3Tjl9I\nbhrWG/Jv2CwqXxK9/xNhhoRCivH7wNPRNXaM/pv3z3d8hOR1YbR9MOELUoR1f1Ib1peRm4b1hsT4\nraj82dmOKxvxpZW5iQJtWI89gFg+dHhkXEpoK7gx2jcSODPtP9qoGs79IaGRsIpQVVRQMQLnAxuB\nV1Ne3QopxrRrXEgOkkgW/jufCiwEXgMeAloUUoyERPd7wpxzbwB3xhEfoTfR89GX46vAaSnn3hid\ntwTom4v4GhIjYSG8dWm/K+0LJb60a9xEgSYRH7HunHOu3ppim4hzzrks8STinHOu3jyJOOecqzdP\nIs455+rNk4hzzrl68yTinHOu3jyJOOecqzdPIs455+rt/wfVZ3CvtN49IAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a0ecc7910>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制不同PCA维数下模型的性能，找到最佳模型／参数（分数最高）\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "plt.plot(n_s, np.array(accuracy), 'b-')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PCA(copy=True, iterated_power='auto', n_components=0.75, random_state=None,\n",
       "  svd_solver='auto', tol=0.0, whiten=False)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#最佳模型参数\n",
    "pca = PCA(n_components=0.75)\n",
    "\n",
    "#根据最佳参数，在全体训练数据上重新训练模型\n",
    "pca.fit(X_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "33"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.n_components_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.09748938,  0.07160266,  0.06145903,  0.05379302,  0.04894262,\n",
       "        0.04303214,  0.03277051,  0.02892103,  0.02766902,  0.02348871,\n",
       "        0.02099325,  0.02059001,  0.01702553,  0.01692787,  0.01581126,\n",
       "        0.0148324 ,  0.01319688,  0.01282727,  0.01187976,  0.01152755,\n",
       "        0.01072191,  0.01015199,  0.00964902,  0.00912846,  0.00887641,\n",
       "        0.00838766,  0.00811856,  0.00777406,  0.00740635,  0.00686661,\n",
       "        0.00657982,  0.00638799,  0.00599367])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.explained_variance_ratio_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#根据最佳参数，对全体训练数据降维\n",
    "X_train_pca = pca.transform(X_train)\n",
    "\n",
    "#根据最佳参数，对测试数据降维\n",
    "X_test_pca = pca.transform(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(42000, 33)\n",
      "(28000, 33)\n"
     ]
    }
   ],
   "source": [
    "# 降维后的特征维数\n",
    "print(X_train_pca.shape)\n",
    "print(X_test_pca.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n",
       "  max_iter=-1, probability=False, random_state=None, shrinking=True,\n",
       "  tol=0.001, verbose=False)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#在降维后的训练数据集上训练SVM分类器\n",
    "clf = svm.SVC()\n",
    "clf.fit(X_train_pca, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# 用在降维后的全体训练数据集上训练的模型对测试集进行测试\n",
    "y_predict = clf.predict(X_test_pca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#生成提交测试结果\n",
    "import pandas as pd\n",
    "df = pd.DataFrame(y_predict)\n",
    "df.columns=['Label']\n",
    "df.index+=1\n",
    "df.index.name = 'Imageid'\n",
    "df.to_csv('SVC_Minist_submission.csv', header=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
